/**********************************************************************

DO FILE SUMMARY

This file tinkers with NHANES III to do measurement-adjusted diabetes prevalence
and % diagnosed using the HPACC dataset.

David Flood

Date: March 28, 2023

**********************************************************************/


clear
cls
version 16
set more off
capture log close
 
* Set graph options and scheme
set scheme s1color
graph set window fontface "Arial"

* Use the macro $S_DATE
global date : di %tdCCYY.NN.DD date("$S_DATE","DMY")

* use
use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Dataset/2021_09_23 - Appended dataset.dta", clear

* Set directory
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/"

* Log
log using "Log files/log_${date}.log", replace

/******************************************************************************
Prepare dataset
*******************************************************************************/

* Random numbers
// gen random_number = runiform()
// drop if random_number > 0.5

* Dropping countries
drop if country == "Albania" // pre 2010
drop if country == "Belize" // pre 2010
drop if country == "Brazil" // to merge in PNS lab data
drop if country == "Cabo Verde" // pre 2010
// drop if country == "Cambodia"	
// drop if country == "Chile"	
drop if country == "China" // pre 2010
// drop if country == "Comoros"	
// drop if country == "Costa Rica"	
drop if	country == "Egypt" // no fbg
// drop if country == "Eritrea"	
drop if	country == "Gambia" // no fbg
drop if	country == "Ghana" // pre 2010
drop if	country == "Fiji" //  using STEPS instead of EHS, appended below
drop if	country == "Grenada" // no fbg
drop if country == "Indonesia" // no fbg	
drop if country == "India" // no fbg	
drop if	country == "Kazakhstan" // no fbg
// drop if country == "Laos"	
// drop if country == "Lesotho"	
// drop if country == "Liberia"	
drop if country == "Libya" // in 2009
drop if country == "Malawi" // use 2017 below
// drop if country == "Marshall Islands"
drop if	country == "Mexico" // use ENSANUT
drop if	country == "Mongolia" // using Mongolia 2019, appended below
drop if country == "Mozambique" // using STEPS 2015 below
drop if country == "Namibia"
drop if country == "Niger" // pre 2010
drop if country == "Peru" // no fbg
// drop if country == "Romania" // no PSU variation so don't want to use
drop if country == "Russian Federation" // pre 2010
// drop if country == "Rwanda"
// drop if country == "Samoa"
// drop if country == "Sao Tome and Principe"
// drop if country == "Seychelles" // single PSU
drop if country == "Sierra Leone" // pre 2010
drop if country == "South Africa" // no fasting glucose
drop if country == "South Africa DHS"
// drop if country == "Tanzania"
// drop if country == "Togo"
drop if country == "Tonga" // no fbg
drop if country == "Ukraine" // pre 2010
// drop if country == "Vanuatu"
// drop if country == "Zanzibar"


* David self cleaning in aspirin paper ------------------------------------------------------
	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Aspirin paper"
	
	* Afghanistan 2018
	append using  "Analysis/Cleaning/Self cleaned/Afghanistan STEPS 2019/afghanistan steps 2022.07.12.dta", nolabel 
			
	* Armenia
	append using "Analysis/Cleaning/Self cleaned/Armenia STEPS 2016/armenia steps 2023.02.22.dta", nolabel
	
	* Bermuda -> messes up imputation as few undiagnosed people
	// append using "Analysis/Cleaning/Self cleaned/Bermuda STEPS 2014/Bermuda_lipid_clean 2022.07.12.dta", nolabel
	
	* Bolivia STEPS 2019: no fbg
// 		append using "Analysis/Cleaning/Self cleaned/Bolivia STEPS 2019/Aspirin_lipid_clean 2023.02.17.dta", nolabel
	
	* Brunei (RA also cleaning currently)
	append using "Analysis/Cleaning/Self cleaned/Brunei STEPS 2015/Brunei_lipid_clean 2022.07.12.dta", nolabel
	
	* Cabo Verde STEPS 2020
	append using "Analysis/Cleaning/Self cleaned/Cabo Verde STEPS 2020/Cabo_verde_lipid_clean 2023.02.17.dta", nolabel
	
// 	* Czech Republic -> don't want to bother as no PSU variation
	append using "Analysis/Cleaning/Self cleaned/Czech Republic EHES 2019/czech_aspirin_clean 2023.03.10.dta", nolabel
	
	* England -> no fasting glucose
// 	append using "Analysis/Cleaning/Self cleaned/England HSE 2013-14/england_aspirin_clean 2023.02.22.dta", nolabel
	
	* Ethiopia 2015
	append using "Analysis/Cleaning/Self cleaned/Ethiopia STEPS 2015/Ethiopia_lipid_clean 2023.02.19.dta", nolabel force

	* Jordan 2015
	append using "Analysis/Cleaning/Self cleaned/Jordan STEPS 2019/Jordan_lipid_clean 2022.07.12.dta", nolabel force
	
	* Kuwait STEPS 2015
	append using "Analysis/Cleaning/Self cleaned/Kuwait STEPS 2015/Kuwait_lipid_clean 2022.07.12.dta", nolabel force
	
	* Malawi STEPS 2017
	append using "Analysis/Cleaning/Self cleaned/Malawi STEPS 2017/Malawi_lipid_clean 2023.01.04.dta", nolabel force
	
	* Mongolia 2019
	append using "Analysis/Cleaning/Self cleaned/Mongolia 2019/Mongolia_lipid_clean 2022.04.03.dta", nolabel force
	
	* Mozambique 2014
	append using "Analysis/Cleaning/Self cleaned/Mozambique STEPS 2015/Mozambique_lipid_clean 2023.03.13.dta", nolabel force
	
	* Nauru 2015
	append using "Analysis/Cleaning/Self cleaned/Nauru STEPS 2015/Nauru_lipid_clean 2022.07.12.dta", nolabel force
	
	* Turkmenistan STEPS 2018
	append using "Analysis/Cleaning/Self cleaned/Turkmenistan STEPS 2018/Turkmenistan_lipid_clean 2023.01.04.dta", nolabel force
			
	* Sao Tome (RA cleaning currently)
	append using "Analysis/Cleaning/Self cleaned/Sao Tome STEPS 2019/SaoTome_lipid_clean 2023.01.09.dta", nolabel

	* USA NHANES 2015-19 
	append using "Analysis/Cleaning/Self cleaned/USA NHANES 2015-19/usa_aspirin_clean 2023.02.22.dta", nolabel

	
* Take a look at countries included and survey year
bysort country: gen country_id=(_n)
list country year if country_id == 1

* David self cleaning for WHO IDF analysis -------------------------------------
	
	frame create string
	frame change string

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/CookIslands_lipid_clean 2022.03.08.dta", clear 
	tostring psu, replace
	tostring stratum, replace
	tostring p_id, force replace
	drop stratum_num
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "cook islands ${date}.dta", replace		

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/Fiji_lipid_clean 2022.03.08.dta", clear 
	tostring psu, replace
	tostring stratum, replace
	tostring p_id, force replace
	drop stratum_num
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "fiji ${date}.dta", replace		

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/FrenchPolynesia_lipid_clean 2022.03.08.dta", clear 
	tostring psu, replace
	tostring stratum, replace
	tostring p_id, force replace
	drop stratum_num
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "french polynesia ${date}.dta", replace		

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/Niue_lipid_clean 2022.03.08.dta", clear 
	tostring psu, replace
	tostring stratum, replace
	tostring p_id, force replace
	drop stratum_num
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "niue ${date}.dta", replace	

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/Palau_lipid_clean 2022.04.20.dta", clear 
	tostring p_id, force replace
	gen w3 = 1 // no sampling weights given, assume equal weights for all respondents. This is reasonabel given no PSU or stratum
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "palau ${date}.dta", replace	

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/Qatar_lipid_clean 2022.03.08.dta", clear 
	tostring psu, replace
	tostring stratum, replace
	tostring p_id, force replace
	drop stratum_num
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "qatar ${date}.dta", replace	

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/TrinidadTobago_lipid_clean 2022.03.08.dta", clear 
	tostring psu, replace
	tostring stratum, replace
	tostring p_id, force replace
	drop stratum_num
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "trinidad ${date}.dta", replace	

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/MCDTR project/Cleaning files/Cleaned data and code/Uruguay_lipid_clean 2022.03.08.dta", clear 
	tostring p_id, force replace
	gen w3 = w_all // no w3 weight
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "uruguay ${date}.dta", replace	

	use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Statin paper/Temporary datasets/pooled_new_surveys - 2022.04.15.dta", clear
	keep if inlist(country,"El Salvador")
	tostring psu, replace
	tostring stratum, replace
	tostring p_id, force replace
	drop stratum_num
	* save dataset	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	save "el salvador ${date}.dta", replace	

	frame change default
	frame drop string

	* Mexico ENSANUT
	append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Aspirin paper/Analysis/Cleaning/Self cleaned/Mexico ENSANUT 2018/Mexico_ENSANUT_aspirin_clean - 2022.07.12.dta", nolabel 
	
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
	* Cook Islands	
	append using "cook islands ${date}.dta", nolabel

	* Fiji
	append using "fiji ${date}.dta", nolabel		

	* French Polynesia
	append using "french polynesia ${date}.dta", nolabel

	* Niue
	append using "niue ${date}.dta", nolabel

	* Palau
	append using "palau ${date}.dta", nolabel

	* Qatar
	append using "qatar ${date}.dta", nolabel

	* Trinidad
	append using "trinidad ${date}.dta", nolabel

	* Uruguay
	append using "uruguay ${date}.dta", nolabel

* Merging in NHANES III
append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Harmonized dataset/NHANES III harmonized 2023.03.08.dta"

	* Changing up some of the NHANES III code
	replace fbg_new = fbg if country == "NHANES III"
	replace fast_new = fast if country == "NHANES III"	
	
* Renaming countries
	tab country, m
	replace country = "Eswatini" if country == "Swaziland"
	replace country = "Sao Tome and Principe" if country == "SaoTome"
	replace country = "Trinidad and Tobago" if country == "Trinidad & Tobago"
	replace country = "Mexico" if country == "Mexico ENSANUT"	

keep    ///
	country /// harmonization variables
	countryGDPclass /// country variables
	p_id stratum stratum_num year psu psu_num psu year countryGDPclass svy /// survey design variables
	w1 w3 /// weight variables 
	age sex pregnant /// demographic
	clin_dia hbg_new /// diabetes health services
		fbg_new fast_new hba1c_p fbg2 /// diabetes biomarkers
	bmi // 
	
* Encoded countries	
encode country, gen(country_encoded)
sort country_encoded
tab country_encoded
distinct country_encoded
scalar n_countries = r(ndistinct)

* Generate a country_id variable (i.e., an id within each country)
tab country	
bysort country: gen country_id=(_n) 
label variable country_id "Within country participant number"
sort country
list country year if country_id == 1 // looking at country, svy type, and year in a nice list

* Clearning the World Bank income group variable
list country year countryGDPclass if country_id == 1
replace countryGDPclass	= 4 if country == "Cook Islands" // not a World Bank member, so using GDP estimates
replace countryGDPclass = 4 if country == "England"
replace countryGDPclass	= 3 if country == "Fiji"
replace countryGDPclass	= 4 if country == "French Polynesia"
replace countryGDPclass = 2 if country == "Mongolia"
replace countryGDPclass = 4 if country == "Nauru"
replace countryGDPclass = 2 if country == "Nepal"
replace countryGDPclass	= 4 if country == "Niue" // this needs to be double checked
replace countryGDPclass	= 3 if country == "Palau"
replace countryGDPclass	= 4 if country == "Qatar"
replace countryGDPclass = 2 if country == "Sao Tome and Principe"
replace countryGDPclass	= 4 if country == "Trinidad and Tobago"
replace countryGDPclass	= 3 if country == "Uruguay"
replace countryGDPclass = 4 if country == "United States"
label define GBDclass 1 "Low" 2 "Lower middle" 3 "Upper middle" 4 "High", modify
label values countryGDPclass GBDclass


* FIgure out World Bank income groups
gen countryGDPclass_string = ""
replace countryGDPclass_string = "LIC" if countryGDPclass == 1
replace countryGDPclass_string = "LMIC" if countryGDPclass == 2
replace countryGDPclass_string = "UMIC" if countryGDPclass == 3
replace countryGDPclass_string = "HIC" if countryGDPclass == 4
sort countryGDPclass
list country year countryGDPclass countryGDPclass_string if country_id == 1

* Replacing survey year to year of data collection after meticulous check:
clonevar survey_year = year
replace survey_year = "2016-17" if country == "Algeria"
replace survey_year = "2015-16" if country == "Nauru"
replace survey_year = "2015-18" if country == "United States"
list country survey_year if country_id == 1

* Renaming surveys
replace svy = "ENS" if country == "Chile"
sort country
list country svy if country_id == 1

* Save country file for merging later
preserve
keep if country_id == 1
keep country countryGDPclass
save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/countryGDPclass $date.dta", replace
restore
	
* Clean missingness

	/* See rows 517-520 of codebook v3 for the following:
	
	666666666 = variable not included in the dataset
	777777777 = subject did not know
	888888888 = subject refused to answer
	999999999 = missing due to skip pattern 	
	
	I make the decision to update the codebook as follows:
	
	.n = variable not in dataset
	.m = true missing
	0 = skipped	*/	
				
	foreach v of varlist pregnant hbg_new fast_new  {
		replace `v' = . if inlist(`v',333333333)  	// 
		replace `v' = . if inlist(`v',444444444)  	// 
		replace `v' = . if inlist(`v',555555555)  	// 555555555 = not eligible
		replace `v' = . if inlist(`v',666666666, 666666688)  	// 666666666 = variable not in the dataset coded as .
		replace `v' = 0 if inlist(`v',777777777,77,777777792,.d)  			// 777777777 = subject did not know
		replace `v' = . if inlist(`v',8,855555554.7,855555571.2,888888888,888888896,888888896,.r)		// 888888888 = subject refused to answer
		replace `v' = 0 if inlist(`v',977777785.6,977777776.8,999999999,1000000000)  	// 999999999 = missing as skip pattern recoded as 0	
	}			
	
	foreach v of varlist fbg_new  {
		replace `v' = . if inlist(`v',555555555)  	// 555555555 = not eligible
		replace `v' = . if inlist(`v',666666666, 666666688)  	// 666666666 = variable not in the dataset coded as .
		replace `v' = . if inlist(`v',777777777,77,777777792,.d)  			// 777777777 = subject did not know
		replace `v' = . if inlist(`v',855555554.7,855555571.2,888888888,888888896,888888896,.r)		// 888888888 = subject refused to answer
		replace `v' = . if inlist(`v',977777776.8,9.78e+08,999999999)		// 999999999 = missing as skip pattern recoded as .
	}	
	replace fbg_new = . if fbg_new >= 97777777 // couldn't get this one in above
	

	* Relevant variables to investigate
	tab country pregnant, m
	tab country hbg_new, m
	tab country fast_new, m
	recode fast_new (1=0) (0=1) if country == "Tajikistan"
	bysort country: sum fbg_new

* Clean outliers
sum fbg_new, d
replace fbg_new = . if fbg_new <2.2 | fbg_new >33.3
sum fbg_new, d

* Assessing for duplicate values
gen id = _n

/*******************************************************************************
Update PSU and stratum variables

This is to ensure these survey variables from the appended files are consistent
with the way they were coded in native HPACC PSU variables are correct
*******************************************************************************/
	
* psu
	
	* sort psu_num
	* browse country psu_num
	/* 	The highest psu_num is 842,013,740 in Tokelau, so using values greater
		than 1,000,000,000 for appended files will ensure uniqueness.	*/
	
	sort country
	list country country_encoded psu psu_num stratum stratum_num if inlist(country_id,1,25,100)
	
	gen psu_abc = ""
	replace psu_abc="AAA"+psu if country=="Afghanistan" 
	replace psu_abc="AAB"+psu if country=="Armenia"
	replace psu_abc="AAC"+psu if country=="Bermuda"
	replace psu_abc="AAD"+psu if country=="Brunei"
	replace psu_abc="AAE"+psu if country=="Cabo Verde"
	replace psu_abc="AAF"+psu if country=="Cook Islands"
	replace psu_abc="AAG"+psu if country=="Czech Republic"
	replace psu_abc="AAH"+psu if country=="El Salvador"
	replace psu_abc="AAI"+psu if country=="Ethiopia"
	replace psu_abc="AAJ"+psu if country=="Fiji"
	replace psu_abc="AAK"+psu if country=="French Polynesia"
	replace psu_abc="AAL"+psu if country=="Jordan"
	replace psu_abc="AAM"+psu if country=="Kuwait"
	replace psu_abc="AAN"+psu if country=="Malawi"
	replace psu_abc="AAO"+psu if country=="Mongolia"
	replace psu_abc="AAP"+psu if country=="Mozambique"
	replace psu_abc="AAQ"+psu if country=="Nauru"
	replace psu_abc="AAR"+psu if country=="Niue"
	replace psu_abc="AAS"+psu if country=="Sao Tome and Principe"
	replace psu_abc="AAT"+psu if country=="Palau"
	replace psu_abc="AAU"+psu if country=="Qatar"
	replace psu_abc="AAV"+psu if country=="Trinidad and Tobago"
	replace psu_abc="AAW"+psu if country=="Turkmenistan"
	replace psu_abc="AAX"+psu if country=="United States"
	replace psu_abc="AAY"+psu if country=="Uruguay"
	replace psu_abc="AAZ"+psu if country=="Romania"
	replace psu_abc="ABA"+psu if country=="Seychelles"
			
	encode psu_abc, gen(psu2)
	replace psu_num=1001000000+psu2 if country=="Afghanistan"
	replace psu_num=1002000000+psu2 if country=="Armenia"
	replace psu_num=1003000000+psu2 if country=="Bermuda"
	replace psu_num=1004000000+psu2 if country=="Brunei"
	replace psu_num=1007000000+psu2 if country=="Cabo Verde"
	replace psu_num=1008000000+psu2 if country=="Cook Islands"
	replace psu_num=1008000000+psu2 if country=="Czech Republic"
	replace psu_num=1009000000+psu2 if country=="El Salvador"
	replace psu_num=1010000000+psu2 if country=="Ethiopia"
	replace psu_num=1011000000+psu2 if country=="Fiji"
	replace psu_num=1012000000+psu2 if country=="French Polynesia"
	replace psu_num=1013000000+psu2 if country=="Jordan"
	replace psu_num=1014000000+psu2 if country=="Kuwait"
	replace psu_num=1015000000+psu2 if country=="Malawi"
	replace psu_num=1016000000+psu2 if country=="Mongolia"
	replace psu_num=1017000000+psu2 if country=="Mozambique"
	replace psu_num=1018000000+psu2 if country=="Nauru"
	replace psu_num=1019000000+psu2 if country=="Nieu"
	replace psu_num=1020000000+psu2 if country=="Sao Tome and Principe"
	replace psu_num=1021000000+psu2 if country=="Palau"
	replace psu_num=1022000000+psu2 if country=="Qatar"
	replace psu_num=1023000000+psu2 if country=="Trinidad and Tobago"
	replace psu_num=1024000000+psu2 if country=="Turkmenistan"
	replace psu_num=1025000000+psu2 if country=="United States"
	replace psu_num=1026000000+psu2 if country=="Uruguay"
	replace psu_num=1027000000+psu2 if country=="Romania"
	replace psu_num=1028000000+psu2 if country=="Seychelles"
	
	list country country_encoded psu_num if inlist(country_id,1,25,100)
		
* stratum
	
	* sort stratum_num
	* browse country stratum_num
	/* 	The highest psu_num is 8,420,208 in Tokelau, so using values greater
		than 10,000,000 for appended files will ensure uniqueness.	*/
	
	sort country
	list country country_encoded stratum stratum_num if inlist(country_id,1,25,100)
	
	gen stratum_abc = ""
	replace stratum_abc="AAA"+stratum if country=="Afghanistan" 
	replace stratum_abc="AAB"+stratum if country=="Armenia"
	replace stratum_abc="AAC"+stratum if country=="Bermuda"
	replace stratum_abc="AAD"+stratum if country=="Brunei"
	replace stratum_abc="AAE"+stratum if country=="Cabo Verde"
	replace stratum_abc="AAF"+stratum if country=="Cook Islands"
	replace stratum_abc="AAG"+stratum if country=="Czech Republic"
	replace stratum_abc="AAH"+stratum if country=="El Salvador"
	replace stratum_abc="AAI"+stratum if country=="Ethiopia"
	replace stratum_abc="AAJ"+stratum if country=="Fiji"
	replace stratum_abc="AAK"+stratum if country=="French Polynesia"
	replace stratum_abc="AAL"+stratum if country=="Jordan"
	replace stratum_abc="AAM"+stratum if country=="Kuwait"
	replace stratum_abc="AAN"+stratum if country=="Malawi"
	replace stratum_abc="AAO"+stratum if country=="Mongolia"
	replace stratum_abc="AAP"+stratum if country=="Mozambique"
	replace stratum_abc="AAQ"+stratum if country=="Nauru"
	replace stratum_abc="AAR"+stratum if country=="Niue"
	replace stratum_abc="AAS"+stratum if country=="Sao Tome and Principe"
	replace stratum_abc="AAT"+stratum if country=="Palau"
	replace stratum_abc="AAU"+stratum if country=="Qatar"
	replace stratum_abc="AAV"+stratum if country=="Trinidad and Tobago"
	replace stratum_abc="AAW"+stratum if country=="Turkmenistan"
	replace stratum_abc="AAX"+stratum if country=="United States"
	replace stratum_abc="AAY"+stratum if country=="Uruguay"
	replace stratum_abc="AAZ"+stratum if country=="Romania"
	replace stratum_abc="ABA"+stratum if country=="Seychelles"
	
	encode stratum_abc, gen(stratum2)
	replace stratum_num=1001000000+stratum2 if country=="Afghanistan"
	replace stratum_num=1002000000+stratum2 if country=="Armenia"
	replace stratum_num=1003000000+stratum2 if country=="Bermuda"
	replace stratum_num=1004000000+stratum2 if country=="Brunei"
	replace stratum_num=1007000000+stratum2 if country=="Cabo Verde"
	replace stratum_num=1008000000+stratum2 if country=="Cook Islands"
	replace stratum_num=1008000000+stratum2 if country=="Czech Republic"
	replace stratum_num=1009000000+stratum2 if country=="El Salvador"
	replace stratum_num=1010000000+stratum2 if country=="Ethiopia"
	replace stratum_num=1011000000+stratum2 if country=="Fiji"
	replace stratum_num=1012000000+stratum2 if country=="French Polynesia"
	replace stratum_num=1013000000+stratum2 if country=="Jordan"
	replace stratum_num=1014000000+stratum2 if country=="Kuwait"
	replace stratum_num=1015000000+stratum2 if country=="Malawi"
	replace stratum_num=1016000000+stratum2 if country=="Mongolia"
	replace stratum_num=1017000000+stratum2 if country=="Mozambique"
	replace stratum_num=1018000000+stratum2 if country=="Nauru"
	replace stratum_num=1019000000+stratum2 if country=="Nieu"
	replace stratum_num=1020000000+stratum2 if country=="Sao Tome and Principe"
	replace stratum_num=1021000000+stratum2 if country=="Palau"
	replace stratum_num=1022000000+stratum2 if country=="Qatar"
	replace stratum_num=1023000000+stratum2 if country=="Trinidad and Tobago"
	replace stratum_num=1024000000+stratum2 if country=="Turkmenistan"
	replace stratum_num=1025000000+stratum2 if country=="United States"
	replace stratum_num=1026000000+stratum2 if country=="Uruguay"
	replace stratum_num=1027000000+stratum2 if country=="Romania"
	replace stratum_num=1028000000+stratum2 if country=="Seychelles"
	
	list country country_encoded stratum_num if inlist(country_id,1,25,100)
 
/******************************************************************************
Prepare for age standardization
*******************************************************************************/
// 1. Prepare age groups
gen age_group = ""
replace age_group = "0-4" if inrange(age,0,4.9999)
replace age_group = "5-9" if inrange(age,5,9.9999)
replace age_group = "10-14" if inrange(age,10,14.9999)
replace age_group = "15-19" if inrange(age,15,19.9999)
replace age_group = "20-24" if inrange(age,20,24.9999)
replace age_group = "25-29" if inrange(age,25,29.9999)
replace age_group = "30-34" if inrange(age,30,34.9999)
replace age_group = "35-39" if inrange(age,35,39.9999)
replace age_group = "40-44" if inrange(age,40,44.9999)
replace age_group = "45-49" if inrange(age,45,49.9999)
replace age_group = "50-54" if inrange(age,50,54.9999)
replace age_group = "55-59" if inrange(age,55,59.9999)
replace age_group = "60-64" if inrange(age,60,64.9999)
replace age_group = "65-69" if inrange(age,65,69.9999)
replace age_group = "70-74" if inrange(age,70,74.9999)
replace age_group = "75-79" if inrange(age,75,79.9999)
replace age_group = "80-84" if inrange(age,80,84.9999)
replace age_group = "85-89" if inrange(age,85,89.9999)
replace age_group = "90-94" if inrange(age,90,94.9999)
replace age_group = "95-99" if inrange(age,95,99.9999)
replace age_group = "100+" if age>=100
	
// 2. merge WHO standard population data with working database, by age_group
// delete the _merge variable, which is leftover from a previous merge

merge m:1 age_group using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/External data/World standard population/WHO standard population.dta"

// All matched:
//
//     Result                      Number of obs
//     -----------------------------------------
//     Not matched                             0
//     Matched                           257,270  (_merge==3)
//     -----------------------------------------

list country age age_group if _merge == 1 
drop _merge

/******************************************************************************
Outcomes, weights, and survey set
*******************************************************************************/

* sample
gen sample = 1 if ///
	(fast_new == 1 | country == "Costa Rica") & /// no fasting variable in Costa Rica, assume all fast
	pregnant != 1 & ///
	inrange(age,18,110) // 

* weqw weights
bys country: egen sum_weq_sample_reg=sum(w3) if !missing(w3) // for subpop estimates, need all w3 weights		
gen weqnr_sample=w3/sum_weq_sample

* svyset
svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)

* Unadjusted overall prevalence definition
gen prev_dm_sr_fbg = .
replace prev_dm_sr_fbg = 0 if !missing(fbg_new) & !missing(hbg_new)
replace prev_dm_sr_fbg = 1 if ///
	hbg_new == 1 | ///
	inrange(fbg_new,7.0,34)
replace prev_dm_sr_fbg = . if missing(fbg_new) | missing(hbg_new)
bysort country: tab prev_dm_sr_fbg
tab prev_dm_sr_fbg

* % diagnosed definition
gen diag_nomi = .
replace diag_nomi = 0 if hbg_new !=. & prev_dm_sr_fbg == 1
replace diag_nomi = 1 if hbg_new == 1 & prev_dm_sr_fbg == 1

/******************************************************************************
Unadjusted estimates
*******************************************************************************/

* Unadjusted overall prevalence
svy, subpop(sample): proportion prev_dm_sr_fbg
estimates store weq_ov_prev_dm_sr_fbg
matrix weq_ov_prev_dm_sr_fbg = r(table)

svy, subpop(sample): proportion prev_dm_sr_fbg, over(country_encoded) 
estimates store weq_c_prev_dm_sr_fbg
matrix weq_c_prev_dm_sr_fbg = r(table)

* Unadjusted % diagnosed
svy, subpop(sample): proportion diag_nomi
estimates store weq_ov_diag_nomi
matrix weq_ov_diag_nomi = r(table)

svy, subpop(sample): proportion diag_nomi, over(country_encoded) 
estimates store weq_c_diag_nomi
matrix weq_c_diag_nomi = r(table)

/******************************************************************************
Checking the imputation model
*******************************************************************************/

* Splines for continuous variables		
mkspline2 fbg_new_spline = fbg_new if !missing(fbg_new), cubic nknots(5) displayknots	

* Check out imputation model
regress fbg2 fbg_new_spline*
* adjustrcspline, addplot(scatter fbg2 fbg_new, msymbol(Oh))

/******************************************************************************
Imputation
*******************************************************************************/

* Set up mi --------------------------------------------------------------------	
mi set wide
mi xtset, clear	
mi register imputed fbg2
mi register regular fbg_new
set seed 12345

foreach d of numlist 3 5 10 {

preserve

* Imputation regression --------------------------------------------------------
mi impute pmm fbg2  ///
	fbg_new ///
	if fbg_new != . & sample == 1 ///
, add(100) knn(`d') replace force

* Check results of fbg2
sum fbg_new if sample == 1, d // unadjusted fbg_new
// mi xeq 0 1 2: sum fbg2 if sample == 1, d // mi adjusted
		
* Drop NHANES
// drop if country == "NHANES III"		
		
* MI outcomes ------------------------------------------------------------------
		
* Generate adjusted overall prevalence variable
mi passive: gen byte prev_dm_sr_fbg_2 = .
mi passive: replace prev_dm_sr_fbg_2 = 0 if ///
	!missing(fbg_new) & !missing(fbg2) & !missing(hbg_new)
mi passive: replace prev_dm_sr_fbg_2 = 1 if /// 
	hbg_new == 1 | ///
	(inrange(fbg_new,7.0,34) & inrange(fbg2,7.0,34)) 
mi passive: replace prev_dm_sr_fbg_2 = . if missing(fbg_new) | missing(fbg2) | missing(hbg_new)	
// mi xeq 0 1 2: bysort country: tab prev_dm_sr_fbg_2 // mi adjusted
// mi xeq 0 1 2: tab prev_dm_sr_fbg_2 // mi adjusted

* Generate adjusted % diagnosed variable
mi passive: gen diag_mi = .
mi passive: replace diag_mi = 0 if hbg_new !=. & prev_dm_sr_fbg_2 == 1
mi passive: replace diag_mi = 1 if hbg_new == 1 & prev_dm_sr_fbg_2 == 1

* MI estimations ---------------------------------------------------------------

* Set svyset mi
mi svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)

* Adjusted prevalence
mi estimate, esampvaryok: svy, subpop(sample): proportion prev_dm_sr_fbg_2
estimates store weq_ov_prev_dm_sr_fbg_2_`d'
matrix weq_ov_prev_dm_sr_fbg_2_`d' = r(table)

mi estimate, esampvaryok: svy, subpop(sample): proportion prev_dm_sr_fbg_2, over(country_encoded) 
estimates store weq_c_prev_dm_sr_fbg_2_`d'
matrix weq_c_prev_dm_sr_fbg_2_`d' = r(table)

* Adjusted % diagnosed
mi estimate, esampvaryok: svy, subpop(sample): proportion diag_mi
estimates store weq_ov_diag_mi_`d'
matrix weq_ov_diag_mi_`d' = r(table)

mi estimate, esampvaryok: svy, subpop(sample): proportion diag_mi, over(country_encoded) 
estimates store weq_c_diag_mi_`d'
matrix weq_c_diag_mi_`d' = r(table)

restore
	
/******************************************************************************
Overall prevalence results
*******************************************************************************/
			
frame create temp_frame
frame change temp_frame

* This step imports and saves the UNADJUSTED estimates -------------------------
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Putexcel files"
putexcel set "weq_c_prev_dm_sr_fbg ${date}.xlsx", replace 

estimates restore weq_c_prev_dm_sr_fbg //  Restoring estimates	
estimates replay weq_c_prev_dm_sr_fbg, baselevels

putexcel (A1) = etable	// exporting the whole regression table

import excel "weq_c_prev_dm_sr_fbg ${date}.xlsx", sheet("Sheet1") cellrange(A3) firstrow clear

rename prev_dm_sr_fbgcountry_encoded country
gen label = "unadjusted"
destring C, replace force
destring D, replace
destring E, replace

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg $date.dta", replace

* This step imports and saves the ADJUSTED estimates ----------------------------
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Putexcel files"
putexcel set "weq_c_prev_dm_sr_fbg_2_`d' ${date}.xlsx", replace 

estimates restore weq_c_prev_dm_sr_fbg_2_`d' //  Restoring estimates	
estimates replay weq_c_prev_dm_sr_fbg_2_`d', baselevels

putexcel (A1) = etable	// exporting the whole regression table

import excel "weq_c_prev_dm_sr_fbg_2_`d' ${date}.xlsx", sheet("Sheet1") cellrange(A3) firstrow clear

rename prev_dm_sr_fbg_2country_encoded country
gen label = "adjusted"
destring C, replace force
destring D, replace
destring E, replace

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg_2_`d' $date.dta", replace

* Do some analysis and make graphs ---------------------------------------------

append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg $date.dta"

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg appended $date.dta", replace

rename (country B C D E) (country estimate st_err lci uci)
drop st_err	
drop if substr(country,1,1) == "0"

replace country = substr(country,3,.)

drop if estimate == 1
destring lci, replace
destring uci, replace

replace estimate = 100*estimate
replace lci = 100*lci
replace uci = 100*uci

encode country, gen(country_encoded) // can do labmask later to get this correct
gen label_encoded = .
replace label_encoded = 1 if label == "unadjusted"
replace label_encoded = 2 if label == "adjusted"

* Suppressing based on NHANES criteria
gen abs_ci_width = uci-lci
gen rel_ci_width = abs_ci_width/est*100
replace estimate = . if ///
	abs_ci_width >= 30 | ///
	(inrange(abs_ci_width,5.0000001,29.999999) & rel_ci_width >130) | ///
	estimate == 0 | estimate == 100
	
drop country label 
gsort + country_encoded + label_encoded
reshape wide est lci uci abs_ci_width rel_ci_width, i(country_encoded) j(label_encoded)

* Generating delta
gen delta = estimate2 - estimate1
sum delta, d

* Generating ratio
gen ratio = estimate2 / estimate1 - 1
sum ratio, d

* Merge in countryGDPclass
decode country_encoded, gen(country)
merge 1:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/countryGDPclass $date.dta"
drop if inlist(_merge,1,2)
drop _merge

* median + IQRs
tabstat estimate1, statistics( median p25 p75 ) format(%9.1f)
tabstat estimate2, statistics( median p25 p75 ) format(%9.1f)
tabstat delta, statistics( median p25 p75 ) format(%9.1f)
tabstat ratio, statistics( median p25 p75 ) format(%9.3f)

* Simple regression and storing output
	regress estimate2 estimate1
	local r2: display %5.3f e(r2)
	  
	* Constant
	local cons "`: di  %3.2f abs(_b[_cons])'"
	di "`cons'"

	* Beta coefficient
	local beta "`: di  %5.3f _b[estimate1]'"
	di "`beta'"

	* Adding or subtract constant
	local add_or_subtract "`=cond(_b[_cons]>0, "+", "–")'"
	di "`add_or_subtract'"

	* Putting it together
	local eq `" y = `beta'x `add_or_subtract' `cons'"'  //  "x " `add_or_subtract' "'
	di "`eq'"

* Lancet colors
	local lancet_1 `" "0 70 139" "' // #00468BFF blue
	local lancet_2 `" "237 0 0" "' // #ED0000FF red
	local lancet_3 `" "66 181 64" "' // #42B540FF green
	local lancet_4 `" "0 153 180" "' // #0099B4FF teal 
	local lancet_5 `" "146 94 159" "' // #925E9FFF purple
	local lancet_6 `" "253 175 145" "' // #FDAF91FF peach
	local lancet_7 `" "173 0 42" "' // #AD002AFF maroon
	local lancet_8 `" "248 194 23" "' // #F8C217 grey -> convert to gold

* Local macros to control images
local l_text_size "2.5rs"	
local l_axis_title_size "3.0rs"
local l_lab_size "2.5rs"
local l_text_size_legend "2.5rs"	
local l_msize "1.25rs"
local l_width "0.2rs"

twoway 	function y=x ///
		, ///
			range(0 30) ///
			lcolor(black%100)  ///
			lwidth(`l_width')  ///
			lpattern(line) || ///
		scatter estimate2 estimate1 if countryGDPclass == 1	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_1'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 2	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_2'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 3	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_3'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 4	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_8'%50"') ///
			mlcolor(none) ///
			msize(`l_msize')  || ///
		lfit estimate2 estimate1 ///
		, ///
			/// range(0 30) ///
			lcolor(black%50)  ///
			lwidth(`l_width')  ///
			lpattern(shortdash) ///
		/// options
		ytitle("Measurement-adjusted prevalence (%)", margin(l=0 r=2rs b=0 t=0) size(`l_axis_title_size'))							///
		xtitle("Measurement-unadjusted prevalence (%)", margin(l=0 r=0 b=0 t=2rs) size(`l_axis_title_size')) 	///
		yscale(ra(0 30)) ylabel(0(5)30, labsize(`l_lab_size'))   ///
		xscale(ra(0 30)) xlabel(0(5)30, labsize(`l_lab_size'))   ///
		/// legend
			legend(order(2 3 4 5) ///
					label(2 "Low-income countries") ///
					label(3 "Lower-middle-income countries") ///
					label(4 "Upper-middle-income countries") ///
					label(5 "High-income countries") ///
				rows(2) size(`l_text_size_legend') ring(1) position(6) region(color(none) lwidth(none)) ///
				bmargin(l=0 r=0 b=0 t=0) colgap(2rs) rowgap(1rs) keygap(1rs) region(lcolor(black))) ///
		xsize(6)   ///
		ysize(6)   ///
		graphregion(color(none) lcolor(none) margin(l=2rs r=5rs b=2rs t=5rs)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		caption("`eq', R{superscript:2}=`r2'",ring(0) pos(11) margin(l=2rs r=0rs t=2rs b=0rs) size(`l_axis_title_size') ) ///
		name(prevalence_`d', replace) //

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Figures"
graph save "prevalence_`d' ${date}.gph", replace         
graph export "prevalence_`d' ${date}.pdf", as(pdf) name(prevalence_`d') replace
graph export "prevalence_`d' ${date}.png", as(png) name(prevalence_`d') replace

		
frame change default
frame drop temp_frame

/******************************************************************************
% diagnosed results
*******************************************************************************/

frame create temp_frame
frame change temp_frame	

* This step imports and saves the UNADJUSTED estimates -------------------------
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Putexcel files"
putexcel set "weq_c_diag_nomi ${date}.xlsx", replace 

estimates restore weq_c_diag_nomi //  Restoring estimates	
estimates replay weq_c_diag_nomi, baselevels

putexcel (A1) = etable	// exporting the whole regression table

import excel "weq_c_diag_nomi ${date}.xlsx", sheet("Sheet1") cellrange(A3) firstrow clear

rename diag_nomicountry_encoded country
gen label = "unadjusted"

destring C, replace force
destring D, replace
destring E, replace

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi $date.dta", replace

* This step imports and saves the ADJUSTED estimates ----------------------------
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Putexcel files"
putexcel set "weq_c_diag_mi_`d' ${date}.xlsx", replace 

estimates restore weq_c_diag_mi_`d' //  Restoring estimates	
estimates replay weq_c_diag_mi_`d', baselevels

putexcel (A1) = etable	// exporting the whole regression table

import excel "weq_c_diag_mi_`d' ${date}.xlsx", sheet("Sheet1") cellrange(A3) firstrow clear

rename diag_micountry_encoded country
gen label = "adjusted"

destring C, replace force
destring D, replace
destring E, replace

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_mi_`d' $date.dta", replace

* Do some analysis and make graphs ---------------------------------------------

append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi $date.dta"

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi appended $date.dta", replace

rename (country B C D E) (country estimate st_err lci uci)
drop st_err	
drop if substr(country,1,1) == "0"

replace country = substr(country,3,.)

drop if estimate == 1
destring lci, replace
destring uci, replace

replace estimate = 100*estimate
replace lci = 100*lci
replace uci = 100*uci

encode country, gen(country_encoded) // can do labmask later to get this correct
gen label_encoded = .
replace label_encoded = 1 if label == "unadjusted"
replace label_encoded = 2 if label == "adjusted"

* Suppressing based on NHANES criteria
gen abs_ci_width = uci-lci
gen rel_ci_width = abs_ci_width/est*100
replace estimate = . if ///
	abs_ci_width >= 30 | ///
	(inrange(abs_ci_width,5.0000001,29.999999) & rel_ci_width >130) | ///
	estimate == 0 | estimate == 100
	
drop country label 
gsort + country_encoded + label_encoded
reshape wide est lci uci abs_ci_width rel_ci_width, i(country_encoded) j(label_encoded)

* Generating delta
gen delta = estimate2 - estimate1
sum delta, d

* Generating ratio
gen ratio = estimate2 / estimate1 - 1
sum ratio, d

* Merge in countryGDPclass
decode country_encoded, gen(country)
merge 1:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/countryGDPclass $date.dta"
drop if inlist(_merge,1,2)
drop _merge

* median + IQRs
tabstat estimate1, statistics( median p25 p75 ) format(%9.1f)
tabstat estimate2, statistics( median p25 p75 ) format(%9.1f)
tabstat delta, statistics( median p25 p75 ) format(%9.1f)
tabstat ratio, statistics( median p25 p75 ) format(%9.3f)

* Simple regression and storing output
	regress estimate2 estimate1
	local r2: display %5.3f e(r2)
	  
	* Constant
	local cons "`: di  %3.2f abs(_b[_cons])'"
	di "`cons'"

	* Beta coefficient
	local beta "`: di  %5.3f _b[estimate1]'"
	di "`beta'"

	* Adding or subtract constant
	local add_or_subtract "`=cond(_b[_cons]>0, "+", "–")'"
	di "`add_or_subtract'"

	* Putting it together
	local eq `" y = `beta'x `add_or_subtract' `cons'"'  //  "x " `add_or_subtract' "'
	di "`eq'"

* Lancet colors
	local lancet_1 `" "0 70 139" "' // #00468BFF blue
	local lancet_2 `" "237 0 0" "' // #ED0000FF red
	local lancet_3 `" "66 181 64" "' // #42B540FF green
	local lancet_4 `" "0 153 180" "' // #0099B4FF teal 
	local lancet_5 `" "146 94 159" "' // #925E9FFF purple
	local lancet_6 `" "253 175 145" "' // #FDAF91FF peach
	local lancet_7 `" "173 0 42" "' // #AD002AFF maroon
	local lancet_8 `" "248 194 23" "' // #F8C217 grey -> convert to gold

* Local macros to control images
local l_text_size "2.5rs"	
local l_axis_title_size "3.0rs"
local l_lab_size "2.5rs"
local l_text_size_legend "2.5rs"	
local l_msize "1.25rs"
local l_width "0.2rs"

twoway 	function y=x ///
		, ///
			range(0 100) ///
			lcolor(black%100)  ///
			lwidth(`l_width')  ///
			lpattern(line) || ///
		scatter estimate2 estimate1 if countryGDPclass == 1	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_1'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 2	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_2'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 3	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_3'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 4	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_8'%50"') ///
			mlcolor(none) ///
			msize(`l_msize')  || ///
		lfit estimate2 estimate1 ///
		, ///
			/// range(0 30) ///
			lcolor(black%50)  ///
			lwidth(`l_width')  ///
			lpattern(shortdash) ///
		/// options
		ytitle("Measurement-adjusted % diagnosed", margin(l=0 r=2rs b=0 t=0) size(`l_axis_title_size'))							///
		xtitle("Measurement-unadjusted % diagnosed", margin(l=0 r=0 b=0 t=2rs) size(`l_axis_title_size')) 	///
		yscale(ra(0 100)) ylabel(0(20)100, labsize(`l_lab_size'))   ///
		xscale(ra(0 100)) xlabel(0(20)100, labsize(`l_lab_size'))   ///
		/// legend
			legend(order(2 3 4 5) ///
					label(2 "Low-income countries") ///
					label(3 "Lower-middle-income countries") ///
					label(4 "Upper-middle-income countries") ///
					label(5 "High-income countries") ///
				rows(2) size(`l_text_size_legend') ring(1) position(6) region(color(none) lwidth(none)) ///
				bmargin(l=0 r=0 b=0 t=0) colgap(2rs) rowgap(1rs) keygap(1rs) region(lcolor(black))) ///
		xsize(6)   ///
		ysize(6)   ///
		graphregion(color(none) lcolor(none) margin(l=2rs r=5rs b=2rs t=5rs)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		caption("`eq', R{superscript:2}=`r2'",ring(0) pos(11) margin(l=2rs r=0rs t=2rs b=0rs) size(`l_axis_title_size') ) ///
		name(diagnosed_`d', replace) //		

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Figures"
graph save "diagnosed_`d' ${date}.gph", replace         
graph export "diagnosed_`d' ${date}.pdf", as(pdf) name(diagnosed_`d') replace
graph export "diagnosed_`d' ${date}.png", as(png) name(diagnosed_`d') replace

frame change default
frame drop temp_frame

}

/*******************************************************************************
********************************************************************************

Table of survey characteristics

*******************************************************************************
*******************************************************************************/

preserve
drop if country == "NHANES III"
drop country_encoded
label drop country_encoded
encode country, gen(country_encoded)
sort country_encoded
tab country_encoded
distinct country_encoded
scalar n_countries = r(ndistinct)

sort country_encoded
distinct country_encoded
scalar n_countries = r(ndistinct)

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/"
putexcel set "Putexcel files/Table survey characteristics ${date}.xlsx", replace 
		   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "Income group"
putexcel C1 = "Survey name"
putexcel D1 = "Year"
putexcel E1 = "Sample size"

* Macro for bottom row
scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
sort country_id country 

* Column: Country --------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel A`row' = country[`n']
}
* Column: Income group ---------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel B`row' = countryGDPclass_string[`n']
}

* Column: Survey name ----------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel C`row' = svy[`n']
}

* Column: Year -----------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel D`row' = survey_year[`n']
}

* Column: Sample size ----------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
count if inlist(sample,1) & country_encoded == `n'
local sample = r(N)
putexcel E`row' = `sample'
}
putexcel E2:F`=scalar(bottom_row)',nformat(number_sep)
	
* Summary row at bottom --------------------------------------------------------
	
* Overall total label
	putexcel A`=scalar(bottom_row)' = "Overall"
	
* sample
	tab sample, matcell(sample)
	matlist sample
	local sample = sample[1,1]
	dis `sample'
	putexcel E`=scalar(bottom_row)' = `sample',nformat(number_sep)
			
restore		

/*******************************************************************************
********************************************************************************
Estimates for appendix table 1
*******************************************************************************
*******************************************************************************/

frame create temp_frame
frame change temp_frame	

* Prevalence -------------------------------------------------------------------

use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg appended $date.dta", clear

rename (country B C D E) (country estimate st_err lci uci)
drop st_err	
drop if substr(country,1,1) == "0"

replace country = substr(country,3,.)

destring lci, replace
destring uci, replace

replace estimate = 100*estimate
replace lci = 100*lci
replace uci = 100*uci

encode country, gen(country_encoded) // can do labmask later to get this correct
gen label_encoded = .
replace label_encoded = 1 if label == "unadjusted"
replace label_encoded = 2 if label == "adjusted"

* Making string variables	
gen est_s = string(estimate,"%9.1f")
gen lci_s = string(lci,"%9.1f")
gen uci_s = string(uci,"%9.1f")
gen output = est_s + " (" + lci_s + "-" + uci_s + ")"

* Suppressing based on NHANES criteria
// gen abs_ci_width = uci-lci
// gen rel_ci_width = abs_ci_width/estimate*100
// replace output = "NR" if ///
// 	abs_ci_width >= 30 | ///
// 	(inrange(abs_ci_width,5.0000001,29.999999) & rel_ci_width >130)
	
drop country label 
gsort + country_encoded + label_encoded
reshape wide estimate lci uci output est_s lci_s uci_s, i(country_encoded) j(label_encoded)
keep country output1 output2

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/table 1 prevalence $date.dta", replace

* Percent of cases diagnosed ---------------------------------------------------

use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi appended $date.dta", clear

rename (country B C D E) (country estimate st_err lci uci)
drop st_err	
drop if substr(country,1,1) == "0"

replace country = substr(country,3,.)

destring lci, replace
destring uci, replace

replace estimate = 100*estimate
replace lci = 100*lci
replace uci = 100*uci

encode country, gen(country_encoded) // can do labmask later to get this correct
gen label_encoded = .
replace label_encoded = 1 if label == "unadjusted"
replace label_encoded = 2 if label == "adjusted"

* Making string variables	
gen est_s = string(estimate,"%9.1f")
gen lci_s = string(lci,"%9.1f")
gen uci_s = string(uci,"%9.1f")
gen output = est_s + " (" + lci_s + "-" + uci_s + ")"

* Suppressing based on NHANES criteria
// gen abs_ci_width = uci-lci
// gen rel_ci_width = abs_ci_width/estimate*100
// replace output = "NR" if ///
// 	abs_ci_width >= 30 | ///
// 	(inrange(abs_ci_width,5.0000001,29.999999) & rel_ci_width >130)
	
drop country label 
gsort + country_encoded + label_encoded
reshape wide estimate lci uci output est_s lci_s uci_s, i(country_encoded) j(label_encoded)
keep country output1 output2

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/table 1 diagnosed $date.dta", replace

frame change default
frame drop temp_frame

mi unset

/*******************************************************************************
********************************************************************************

Sensitivity analysis with local residual draws

Morris TP, White IR, Royston P. Tuning multiple imputation by predictive mean
matching and local residual draws. BMC Med Res Methodol 2014; 14: 75.

********************************************************************************
********************************************************************************/

set seed 12345

* Does the imputation using local residual draw method
foreach n of numlist 1(1)100 {
	uvis regress fbg2 fbg_new_spline* if fbg_new != . & sample == 1, match lrd matchtype(1) matchpool(5) gen(fbg2_lrd`n')  replace // 
// 	gen _mi`n' = _n
	}
	
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files"
save "lrd dataset $date", replace
mi import wide, imputed(fbg2=fbg2_lrd*)
mi describe
mi xtset, clear	

* MI outcomes ------------------------------------------------------------------
		
* Generate adjusted overall prevalence variable
mi passive: gen byte prev_dm_sr_fbg_2_lrd = .
mi passive: replace prev_dm_sr_fbg_2_lrd = 0 if ///
	!missing(fbg_new) & !missing(fbg2) & !missing(hbg_new)
mi passive: replace prev_dm_sr_fbg_2_lrd = 1 if /// 
	hbg_new == 1 | ///
	(inrange(fbg_new,7.0,34) & inrange(fbg2,7.0,34)) 
mi passive: replace prev_dm_sr_fbg_2_lrd = . if missing(fbg_new) | missing(fbg2) | missing(hbg_new)	
// mi xeq 0 1 2: bysort country: tab prev_dm_sr_fbg_2_lrd // mi adjusted
// mi xeq 0 1 2: tab prev_dm_sr_fbg_2_lrd // mi adjusted

* Generate adjusted % diagnosed variable
mi passive: gen diag_mi_lrd = .
mi passive: replace diag_mi_lrd = 0 if hbg_new !=. & prev_dm_sr_fbg_2_lrd == 1
mi passive: replace diag_mi_lrd = 1 if hbg_new == 1 & prev_dm_sr_fbg_2_lrd == 1

* MI estimations ---------------------------------------------------------------

* Set svyset mi
mi svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)

* Adjusted prevalence
mi estimate, esampvaryok: svy, subpop(sample): proportion prev_dm_sr_fbg_2_lrd
estimates store weq_ov_prev_dm_sr_fbg_2_lrd
matrix weq_ov_prev_dm_sr_fbg_2_lrd = r(table)

mi estimate, esampvaryok: svy, subpop(sample): proportion prev_dm_sr_fbg_2_lrd, over(country_encoded) 
estimates store weq_c_prev_dm_sr_fbg_2_lrd
matrix weq_c_prev_dm_sr_fbg_2_lrd = r(table)

* Adjusted % diagnosed
mi estimate, esampvaryok: svy, subpop(sample): proportion diag_mi_lrd
estimates store weq_ov_diag_mi_lrd
matrix weq_ov_diag_mi_lrd = r(table)

mi estimate, esampvaryok: svy, subpop(sample): proportion diag_mi_lrd, over(country_encoded) 
estimates store weq_c_diag_mi_lrd
matrix weq_c_diag_mi_lrd = r(table)

/******************************************************************************
Sensitivity analysis with local residual draws: Overall prevalence results
*******************************************************************************/
			
frame create temp_frame
frame change temp_frame

* This step imports and saves the UNADJUSTED estimates -------------------------
use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg $date.dta", clear

* This step imports and saves the ADJUSTED estimates ----------------------------
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Putexcel files"
putexcel set "weq_c_prev_dm_sr_fbg_2_lrd ${date}.xlsx", replace 

estimates restore weq_c_prev_dm_sr_fbg_2_lrd //  Restoring estimates	
estimates replay weq_c_prev_dm_sr_fbg_2_lrd, baselevels

putexcel (A1) = etable	// exporting the whole regression table

import excel "weq_c_prev_dm_sr_fbg_2_lrd ${date}.xlsx", sheet("Sheet1") cellrange(A3) firstrow clear

rename prev_dm_sr_fbg_2_lrdcountry_enc country
gen label = "adjusted"
destring C, replace force
destring D, replace
destring E, replace

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg_2_lrd $date.dta", replace

* Do some analysis and make graphs ---------------------------------------------

append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg $date.dta"

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg lrd appended $date.dta", replace

rename (country B C D E) (country estimate st_err lci uci)
drop st_err	
drop if substr(country,1,1) == "0"

replace country = substr(country,3,.)

drop if estimate == 1
destring lci, replace
destring uci, replace

replace estimate = 100*estimate
replace lci = 100*lci
replace uci = 100*uci

encode country, gen(country_encoded) // can do labmask later to get this correct
gen label_encoded = .
replace label_encoded = 1 if label == "unadjusted"
replace label_encoded = 2 if label == "adjusted"

* Suppressing based on NHANES criteria
gen abs_ci_width = uci-lci
gen rel_ci_width = abs_ci_width/est*100
replace estimate = . if ///
	abs_ci_width >= 30 | ///
	(inrange(abs_ci_width,5.0000001,29.999999) & rel_ci_width >130) | ///
	estimate == 0 | estimate == 100
	
drop country label 
gsort + country_encoded + label_encoded
reshape wide est lci uci abs_ci_width rel_ci_width, i(country_encoded) j(label_encoded)

* Generating delta
gen delta = estimate2 - estimate1
sum delta, d

* Generating ratio
gen ratio = estimate2 / estimate1 - 1
sum ratio, d

* Merge in countryGDPclass
decode country_encoded, gen(country)
merge 1:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/countryGDPclass $date.dta"
drop if inlist(_merge,1,2)
drop _merge

* median + IQRs
tabstat estimate1, statistics( median p25 p75 ) format(%9.1f)
tabstat estimate2, statistics( median p25 p75 ) format(%9.1f)
tabstat delta, statistics( median p25 p75 ) format(%9.1f)
tabstat ratio, statistics( median p25 p75 ) format(%9.3f)

* Simple regression and storing output
	regress estimate2 estimate1
	local r2: display %5.3f e(r2)
	  
	* Constant
	local cons "`: di  %3.2f abs(_b[_cons])'"
	di "`cons'"

	* Beta coefficient
	local beta "`: di  %5.3f _b[estimate1]'"
	di "`beta'"

	* Adding or subtract constant
	local add_or_subtract "`=cond(_b[_cons]>0, "+", "–")'"
	di "`add_or_subtract'"

	* Putting it together
	local eq `" y = `beta'x `add_or_subtract' `cons'"'  //  "x " `add_or_subtract' "'
	di "`eq'"

* Lancet colors
	local lancet_1 `" "0 70 139" "' // #00468BFF blue
	local lancet_2 `" "237 0 0" "' // #ED0000FF red
	local lancet_3 `" "66 181 64" "' // #42B540FF green
	local lancet_4 `" "0 153 180" "' // #0099B4FF teal 
	local lancet_5 `" "146 94 159" "' // #925E9FFF purple
	local lancet_6 `" "253 175 145" "' // #FDAF91FF peach
	local lancet_7 `" "173 0 42" "' // #AD002AFF maroon
	local lancet_8 `" "248 194 23" "' // #F8C217 grey -> convert to gold

* Local macros to control images
local l_text_size "2.5rs"	
local l_axis_title_size "3.0rs"
local l_lab_size "2.5rs"
local l_text_size_legend "2.5rs"	
local l_msize "1.25rs"
local l_width "0.2rs"

twoway 	function y=x ///
		, ///
			range(0 30) ///
			lcolor(black%100)  ///
			lwidth(`l_width')  ///
			lpattern(line) || ///
		scatter estimate2 estimate1 if countryGDPclass == 1	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_1'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 2	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_2'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 3	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_3'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 4	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_8'%50"') ///
			mlcolor(none) ///
			msize(`l_msize')  || ///
		lfit estimate2 estimate1 ///
		, ///
			/// range(0 30) ///
			lcolor(black%50)  ///
			lwidth(`l_width')  ///
			lpattern(shortdash) ///
		/// options
		ytitle("Measurement-adjusted prevalence (%)", margin(l=0 r=2rs b=0 t=0) size(`l_axis_title_size'))							///
		xtitle("Measurement-unadjusted prevalence (%)", margin(l=0 r=0 b=0 t=2rs) size(`l_axis_title_size')) 	///
		yscale(ra(0 30)) ylabel(0(5)30, labsize(`l_lab_size'))   ///
		xscale(ra(0 30)) xlabel(0(5)30, labsize(`l_lab_size'))   ///
		/// legend
			legend(order(2 3 4 5) ///
					label(2 "Low-income countries") ///
					label(3 "Lower-middle-income countries") ///
					label(4 "Upper-middle-income countries") ///
					label(5 "High-income countries") ///
				rows(2) size(`l_text_size_legend') ring(1) position(6) region(color(none) lwidth(none)) ///
				bmargin(l=0 r=0 b=0 t=0) colgap(2rs) rowgap(1rs) keygap(1rs) region(lcolor(black))) ///
		xsize(6)   ///
		ysize(6)   ///
		graphregion(color(none) lcolor(none) margin(l=2rs r=5rs b=2rs t=5rs)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		caption("`eq', R{superscript:2}=`r2'",ring(0) pos(11) margin(l=2rs r=0rs t=2rs b=0rs) size(`l_axis_title_size') ) ///
		name(prevalence_lrd, replace) //

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Figures"
graph save "prevalence_lrd ${date}.gph", replace         
graph export "prevalence_lrd ${date}.pdf", as(pdf) name(prevalence_lrd) replace
graph export "prevalence_lrd ${date}.png", as(png) name(prevalence_lrd) replace
		
frame change default
frame drop temp_frame

/******************************************************************************
Sensitivity analysis with local residual draws: % diagnosed results
*******************************************************************************/

frame create temp_frame
frame change temp_frame	

* This step imports and saves the UNADJUSTED estimates -------------------------
use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi $date.dta", clear

* This step imports and saves the ADJUSTED estimates ----------------------------
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Putexcel files"
putexcel set "weq_c_diag_mi_lrd ${date}.xlsx", replace 

estimates restore weq_c_diag_mi_lrd //  Restoring estimates	
estimates replay weq_c_diag_mi_lrd, baselevels

putexcel (A1) = etable	// exporting the whole regression table

import excel "weq_c_diag_mi_lrd ${date}.xlsx", sheet("Sheet1") cellrange(A3) firstrow clear

rename diag_mi_lrdcountry_encoded country
gen label = "adjusted"

destring C, replace force
destring D, replace
destring E, replace

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_mi_lrd $date.dta", replace

* Do some analysis and make graphs ---------------------------------------------

append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi $date.dta"

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi lrd appended $date.dta", replace

rename (country B C D E) (country estimate st_err lci uci)
drop st_err	
drop if substr(country,1,1) == "0"

replace country = substr(country,3,.)

drop if estimate == 1
destring lci, replace
destring uci, replace

replace estimate = 100*estimate
replace lci = 100*lci
replace uci = 100*uci

encode country, gen(country_encoded) // can do labmask later to get this correct
gen label_encoded = .
replace label_encoded = 1 if label == "unadjusted"
replace label_encoded = 2 if label == "adjusted"

* Suppressing based on NHANES criteria
gen abs_ci_width = uci-lci
gen rel_ci_width = abs_ci_width/est*100
replace estimate = . if ///
	abs_ci_width >= 30 | ///
	(inrange(abs_ci_width,5.0000001,29.999999) & rel_ci_width >130) | ///
	estimate == 0 | estimate == 100
	
drop country label 
gsort + country_encoded + label_encoded
reshape wide est lci uci abs_ci_width rel_ci_width, i(country_encoded) j(label_encoded)

* Generating delta
gen delta = estimate2 - estimate1
sum delta, d

* Generating ratio
gen ratio = estimate2 / estimate1 - 1
sum ratio, d

* Merge in countryGDPclass
decode country_encoded, gen(country)
merge 1:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/countryGDPclass $date.dta"
drop if inlist(_merge,1,2)
drop _merge

* median + IQRs
tabstat estimate1, statistics( median p25 p75 ) format(%9.1f)
tabstat estimate2, statistics( median p25 p75 ) format(%9.1f)
tabstat delta, statistics( median p25 p75 ) format(%9.1f)
tabstat ratio, statistics( median p25 p75 ) format(%9.3f)

* Simple regression and storing output
	regress estimate2 estimate1
	local r2: display %5.3f e(r2)
	  
	* Constant
	local cons "`: di  %3.2f abs(_b[_cons])'"
	di "`cons'"

	* Beta coefficient
	local beta "`: di  %5.3f _b[estimate1]'"
	di "`beta'"

	* Adding or subtract constant
	local add_or_subtract "`=cond(_b[_cons]>0, "+", "–")'"
	di "`add_or_subtract'"

	* Putting it together
	local eq `" y = `beta'x `add_or_subtract' `cons'"'  //  "x " `add_or_subtract' "'
	di "`eq'"

* Lancet colors
	local lancet_1 `" "0 70 139" "' // #00468BFF blue
	local lancet_2 `" "237 0 0" "' // #ED0000FF red
	local lancet_3 `" "66 181 64" "' // #42B540FF green
	local lancet_4 `" "0 153 180" "' // #0099B4FF teal 
	local lancet_5 `" "146 94 159" "' // #925E9FFF purple
	local lancet_6 `" "253 175 145" "' // #FDAF91FF peach
	local lancet_7 `" "173 0 42" "' // #AD002AFF maroon
	local lancet_8 `" "248 194 23" "' // #F8C217 grey -> convert to gold

* Local macros to control images
local l_text_size "2.5rs"	
local l_axis_title_size "3.0rs"
local l_lab_size "2.5rs"
local l_text_size_legend "2.5rs"	
local l_msize "1.25rs"
local l_width "0.2rs"

twoway 	function y=x ///
		, ///
			range(0 100) ///
			lcolor(black%100)  ///
			lwidth(`l_width')  ///
			lpattern(line) || ///
		scatter estimate2 estimate1 if countryGDPclass == 1	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_1'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 2	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_2'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 3	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_3'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter estimate2 estimate1 if countryGDPclass == 4	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_8'%50"') ///
			mlcolor(none) ///
			msize(`l_msize')  || ///
		lfit estimate2 estimate1 ///
		, ///
			/// range(0 30) ///
			lcolor(black%50)  ///
			lwidth(`l_width')  ///
			lpattern(shortdash) ///
		/// options
		ytitle("Measurement-adjusted % diagnosed", margin(l=0 r=2rs b=0 t=0) size(`l_axis_title_size'))							///
		xtitle("Measurement-unadjusted % diagnosed", margin(l=0 r=0 b=0 t=2rs) size(`l_axis_title_size')) 	///
		yscale(ra(0 100)) ylabel(0(20)100, labsize(`l_lab_size'))   ///
		xscale(ra(0 100)) xlabel(0(20)100, labsize(`l_lab_size'))   ///
		/// legend
			legend(order(2 3 4 5) ///
					label(2 "Low-income countries") ///
					label(3 "Lower-middle-income countries") ///
					label(4 "Upper-middle-income countries") ///
					label(5 "High-income countries") ///
				rows(2) size(`l_text_size_legend') ring(1) position(6) region(color(none) lwidth(none)) ///
				bmargin(l=0 r=0 b=0 t=0) colgap(2rs) rowgap(1rs) keygap(1rs) region(lcolor(black))) ///
		xsize(6)   ///
		ysize(6)   ///
		graphregion(color(none) lcolor(none) margin(l=2rs r=5rs b=2rs t=5rs)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		caption("`eq', R{superscript:2}=`r2'",ring(0) pos(11) margin(l=2rs r=0rs t=2rs b=0rs) size(`l_axis_title_size') ) ///
		name(diagnosed_lrd, replace) //		

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Figures"
graph save "diagnosed_lrd ${date}.gph", replace         
graph export "diagnosed_lrd ${date}.pdf", as(pdf) name(diagnosed_lrd) replace
graph export "diagnosed_lrd ${date}.png", as(png) name(diagnosed_lrd) replace

frame change default
frame drop temp_frame

/*******************************************************************************
********************************************************************************

Sensitivity analysis using Selvin approach

Fang M, Wang D, Coresh J, Selvin E. Undiagnosed Diabetes in U.S. Adults: Prevalence and Trends. Diabetes Care 2022; 45(9): 1994-2002.

********************************************************************************
********************************************************************************/

frame create temp_frame
frame change temp_frame	

* Unadjusted prevalence
use "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_prev_dm_sr_fbg $date.dta", clear

gen indicator = .
replace indicator = 1 if substr(country,1,1) == "0" // "prev_0"
replace indicator = 2 if substr(country,1,1) == "1" // "prev_1"

* Unadjusted % with diagnosed diabetes, among all with diabetes
append using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/weq_c_diag_nomi $date.dta"

replace indicator = 3 if substr(country,1,1) == "0" & missing(indicator) // "diag_nomi_0" 
replace indicator = 4 if substr(country,1,1) == "1" & missing(indicator) // "diag_nomi_1"

rename (country B C D E) (country estimate st_err lci uci)
drop st_err	label

replace country = substr(country,3,.)

drop if estimate == 1
destring lci, replace
destring uci, replace

replace estimate = 100*estimate
replace lci = 100*lci
replace uci = 100*uci

* Suppressing based on NHANES criteria
gen abs_ci_width = uci-lci
gen rel_ci_width = abs_ci_width/est*100
replace estimate = . if ///
	abs_ci_width >= 30 | ///
	(inrange(abs_ci_width,5.0000001,29.999999) & rel_ci_width >130) | ///
	estimate == 0 | estimate == 100

encode country, gen(country_encoded) // can do labmask later to get this correct
drop abs_ci_width rel_ci_width
drop lci uci

reshape wide est, i(country_encoded) j(indicator)

gen selvin_prevalence = (estimate2 * (estimate4/100)) + /// diagnosed proportion
						(estimate2 * 0.76 * (estimate3/100))  // undiagnosed diabetes

gen selvin_diagnosed = 100*(estimate4/100) / (0.76 * (estimate3/100) + (estimate4/100))

gsort + country_encoded

// * Generating delta
// gen delta = estimate2 - estimate1
// sum delta, d
//
// * Generating ratio
// gen ratio = estimate2 / estimate1 - 1
// sum ratio, d

* Merge in countryGDPclass
drop country
decode country_encoded, gen(country)
merge 1:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Temporary files/countryGDPclass $date.dta"
drop if inlist(_merge,1,2)
drop _merge

* Selvin prevalence figure -----------------------------------------------------

* Simple regression and storing output
	regress selvin_prevalence estimate2 
	local r2: display %5.3f e(r2)
	  
	* Constant
	local cons "`: di  %3.2f abs(_b[_cons])'"
	di "`cons'"

	* Beta coefficient
	local beta "`: di  %5.3f _b[estimate1]'"
	di "`beta'"

	* Adding or subtract constant
	local add_or_subtract "`=cond(_b[_cons]>0, "+", "–")'"
	di "`add_or_subtract'"

	* Putting it together
	local eq `" y = `beta'x `add_or_subtract' `cons'"'  //  "x " `add_or_subtract' "'
	di "`eq'"

* Lancet colors
	local lancet_1 `" "0 70 139" "' // #00468BFF blue
	local lancet_2 `" "237 0 0" "' // #ED0000FF red
	local lancet_3 `" "66 181 64" "' // #42B540FF green
	local lancet_4 `" "0 153 180" "' // #0099B4FF teal 
	local lancet_5 `" "146 94 159" "' // #925E9FFF purple
	local lancet_6 `" "253 175 145" "' // #FDAF91FF peach
	local lancet_7 `" "173 0 42" "' // #AD002AFF maroon
	local lancet_8 `" "248 194 23" "' // #F8C217 grey -> convert to gold

* Local macros to control images
local l_text_size "2.5rs"	
local l_axis_title_size "3.0rs"
local l_lab_size "2.5rs"
local l_text_size_legend "2.5rs"	
local l_msize "1.25rs"
local l_width "0.2rs"

twoway 	function y=x ///
		, ///
			range(0 30) ///
			lcolor(black%100)  ///
			lwidth(`l_width')  ///
			lpattern(line) || ///
		scatter selvin_prevalence estimate2  if countryGDPclass == 1	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_1'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter selvin_prevalence estimate2  if countryGDPclass == 2	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_2'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter selvin_prevalence estimate2  if countryGDPclass == 3	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_3'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter selvin_prevalence estimate2  if countryGDPclass == 4	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_8'%50"') ///
			mlcolor(none) ///
			msize(`l_msize')  || ///
		lfit selvin_prevalence estimate2  ///
		, ///
			/// range(0 30) ///
			lcolor(black%50)  ///
			lwidth(`l_width')  ///
			lpattern(shortdash) ///
		/// options
		ytitle("Measurement-adjusted prevalence (%)", margin(l=0 r=2rs b=0 t=0) size(`l_axis_title_size'))							///
		xtitle("Measurement-unadjusted prevalence (%)", margin(l=0 r=0 b=0 t=2rs) size(`l_axis_title_size')) 	///
		yscale(ra(0 30)) ylabel(0(5)30, labsize(`l_lab_size'))   ///
		xscale(ra(0 30)) xlabel(0(5)30, labsize(`l_lab_size'))   ///
		/// legend
			legend(order(2 3 4 5) ///
					label(2 "Low-income countries") ///
					label(3 "Lower-middle-income countries") ///
					label(4 "Upper-middle-income countries") ///
					label(5 "High-income countries") ///
				rows(2) size(`l_text_size_legend') ring(1) position(6) region(color(none) lwidth(none)) ///
				bmargin(l=0 r=0 b=0 t=0) colgap(2rs) rowgap(1rs) keygap(1rs) region(lcolor(black))) ///
		xsize(6)   ///
		ysize(6)   ///
		graphregion(color(none) lcolor(none) margin(l=2rs r=5rs b=2rs t=5rs)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		caption("`eq', R{superscript:2}=`r2'",ring(0) pos(11) margin(l=2rs r=0rs t=2rs b=0rs) size(`l_axis_title_size') ) ///
		name(selvin_prevalence, replace) //

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Figures"
graph save "selvin_prevalence ${date}.gph", replace         
graph export "selvin_prevalence ${date}.pdf", as(pdf) name(selvin_prevalence) replace
graph export "selvin_prevalence ${date}.png", as(png) name(selvin_prevalence) replace

* Selvin diagnosed figure ------------------------------------------------------

* Simple regression and storing output
	regress selvin_diagnosed estimate4
	local r2: display %5.3f e(r2)
	  
	* Constant
	local cons "`: di  %3.2f abs(_b[_cons])'"
	di "`cons'"

	* Beta coefficient
	local beta "`: di  %5.3f _b[estimate1]'"
	di "`beta'"

	* Adding or subtract constant
	local add_or_subtract "`=cond(_b[_cons]>0, "+", "–")'"
	di "`add_or_subtract'"

	* Putting it together
	local eq `" y = `beta'x `add_or_subtract' `cons'"'  //  "x " `add_or_subtract' "'
	di "`eq'"

* Lancet colors
	local lancet_1 `" "0 70 139" "' // #00468BFF blue
	local lancet_2 `" "237 0 0" "' // #ED0000FF red
	local lancet_3 `" "66 181 64" "' // #42B540FF green
	local lancet_4 `" "0 153 180" "' // #0099B4FF teal 
	local lancet_5 `" "146 94 159" "' // #925E9FFF purple
	local lancet_6 `" "253 175 145" "' // #FDAF91FF peach
	local lancet_7 `" "173 0 42" "' // #AD002AFF maroon
	local lancet_8 `" "248 194 23" "' // #F8C217 grey -> convert to gold

* Local macros to control images
local l_text_size "2.5rs"	
local l_axis_title_size "3.0rs"
local l_lab_size "2.5rs"
local l_text_size_legend "2.5rs"	
local l_msize "1.25rs"
local l_width "0.2rs"

twoway 	function y=x ///
		, ///
			range(0 100) ///
			lcolor(black%100)  ///
			lwidth(`l_width')  ///
			lpattern(line) || ///
		scatter selvin_diagnosed estimate4  if countryGDPclass == 1	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_1'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter selvin_diagnosed estimate4  if countryGDPclass == 2	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_2'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter selvin_diagnosed estimate4  if countryGDPclass == 3	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_3'%50"') ///
			mlcolor(none) ///
			msize(`l_msize') || ///
		scatter selvin_diagnosed estimate4  if countryGDPclass == 4	///
		,  ///
			msymbol(circle) ///
			mcolor(`"`lancet_8'%50"') ///
			mlcolor(none) ///
			msize(`l_msize')  || ///
		lfit selvin_diagnosed estimate4  ///
		, ///
			/// range(0 30) ///
			lcolor(black%50)  ///
			lwidth(`l_width')  ///
			lpattern(shortdash) ///
		/// options
		ytitle("Measurement-adjusted % diagnosed", margin(l=0 r=2rs b=0 t=0) size(`l_axis_title_size'))							///
		xtitle("Measurement-unadjusted % diagnosed", margin(l=0 r=0 b=0 t=2rs) size(`l_axis_title_size')) 	///
		yscale(ra(0 100)) ylabel(0(20)100, labsize(`l_lab_size'))   ///
		xscale(ra(0 100)) xlabel(0(20)100, labsize(`l_lab_size'))   ///
		/// legend
			legend(order(2 3 4 5) ///
					label(2 "Low-income countries") ///
					label(3 "Lower-middle-income countries") ///
					label(4 "Upper-middle-income countries") ///
					label(5 "High-income countries") ///
				rows(2) size(`l_text_size_legend') ring(1) position(6) region(color(none) lwidth(none)) ///
				bmargin(l=0 r=0 b=0 t=0) colgap(2rs) rowgap(1rs) keygap(1rs) region(lcolor(black))) ///
		xsize(6)   ///
		ysize(6)   ///
		graphregion(color(none) lcolor(none) margin(l=2rs r=5rs b=2rs t=5rs)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
		caption("`eq', R{superscript:2}=`r2'",ring(0) pos(11) margin(l=2rs r=0rs t=2rs b=0rs) size(`l_axis_title_size') ) ///
		name(selvin_diagnosed, replace) //		

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/NHANES III measurement/Figures"
graph save "selvin_diagnosed ${date}.gph", replace         
graph export "selvin_diagnosed ${date}.pdf", as(pdf) name(selvin_diagnosed) replace
graph export "selvin_diagnosed ${date}.png", as(png) name(selvin_diagnosed) replace
		
frame change default
frame drop temp_frame

log close
